/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package htmlunit.code;

import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlHeading1;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSpan;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableBody;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import java.net.URLDecoder;
import java.util.List;
import htmlunit.mode.*;
import hibernate.Dao.crawlerDao;
import org.w3c.dom.html.HTMLDivElement;
/**
 *
 * @author TiTa
 */

    

 



public class parse_page_facebook{
    ThongTin tt=new ThongTin();
Link l=new Link();
crawlerDao cd=new crawlerDao();
    ket_noi_url kn = new ket_noi_url();
    DomElement doc;
//   } else if (set_table != 0) {
    
     public  parse_page_facebook(Link l1){
    this.l=l1;
    }
    public parse_page_facebook(){}
  String  so_1_set_table;
String  dia_chi_set_table;
String so_nguoi_like;
 String link_set_table;
 String span_a_set_table;
    //end khai bao toan cuc
 //set_fsm_fwn_fcg != 0 && set_table == 0
    String row1;
    
    public void parse_facebook_is_page_fbLongBlurb(String url) throws ExceptionInInitializerError,Exception{
        try{
                 String url_media=url;
           String[] arr;
      arr = url_media.split("/");
      //System.out.print(url+"\n");
        //  System.out.print(arr[2]+"\n"+arr[3]);
            final HtmlPage page = kn.ketnoi(url);

            
            int set_fbLongBlurb = page.getByXPath("//span[@class='fbLongBlurb']").size();
            int set_fsm_fwn_fcg = page.getByXPath("//span[@class='fsm fwn fcg']").size();//
            int set_div_fsm_fwn_fcg = page.getByXPath("//div[@class='fsm fwn fcg']").size();
            int set_table = page.getByXPath("//table[@class='uiGrid _51mz _5a-']").size();
            
            if (set_fbLongBlurb != 0) {//loi nhieu lam
                
                final HtmlSpan span1 = (HtmlSpan) page.getByXPath("//span[@class='fbLongBlurb']").get(0);
                String url_info=url+"/info";
               final HtmlPage page_set_fbLongBlurb = kn.ketnoi(url_info);
                
                 String[] b=new String[10];
                 for(int k=0;k<5;k++){
                 b[k]=null;
                 }
                 //ten quan 
                  final HtmlSpan ten_wuan_0 = (HtmlSpan) page.getByXPath("//span[@itemprop='name']").get(0);//co thuoc tinh
                   String ten_wuan=ten_wuan_0.getTextContent();
                   System.out.println(ten_wuan);
                   //so mguoi like
                 final HtmlSpan span_nguoi_like = (HtmlSpan) page.getByXPath("//span[@class='_wj stats fwb']").get(0);
                      String so_luong_gnuoi_like=span_nguoi_like.getTextContent();
                     //  System.out.println(so_luong_gnuoi_like);//nguoi like
                //     int table_info = (int) page_set_fbLongBlurb.getByXPath("table[@role='presentation']").size();
              
                 for(int i=0;i<2;i++){
                     
                 final List<HtmlTable> table_in_trong_info= (List<HtmlTable>) page_set_fbLongBlurb.getByXPath("//table[@class='_5e7- profileInfoTable _3stn']");
               
                 
                  int j=0;
                  for (final HtmlTableBody body : table_in_trong_info.get(i).getBodies()) {
                        final List<HtmlTableRow> rows = body.getRows();
                        
                       // System.out.println(rows.get(0).getTextContent());
                        b[j]=rows.get(0).getTextContent();
                         // System.out.println(rows.get(1).getTextContent());
                        System.out.print(b[j]+"\n");
                        j++;
                        
                         }
                 
             }
                // doc=div4.getElementsByTagName("div").get(0);
                String content_in_div = span1.getElementsByTagName("div").get(0).getTextContent();
                // String encode_utf8= URLDecoder.decode(content_in_div, "UTF-8");
               System.out.println(content_in_div );
                //(String diachi, String dienthoai, String tenquan, String songuoilike, String loaiwuan, String thoigianhoatdong, String linkwuan)
                
      lay_thongtin_capnhat(b[1],b[3], ten_wuan,so_luong_gnuoi_like, ten_wuan, b[0], url);

            } else if (set_fsm_fwn_fcg != 0 && set_table == 0) {
                /*  final HtmlSpan span2 = (HtmlSpan) page.getByXPath("//span[@class='fsm fwn fcg']").get(0);
                 int so_luong=span2.getElementsByTagName("a").size();
                 for(int i=0;i<so_luong;i++){
                 // final HtmlSpan span2 = (HtmlSpan) page.getByXPath("//span[@class='fsm fwn fcg']").get(0);
                 // int so_luong=span2.getElementsByTagName("a").size();
                            
                 // String link=span2.getElementsByTagName("a").get(0).getAttribute("href");
                 String link1=span2.getElementsByTagName("a").get(i).getTextContent();
                       
                       
                 System.out.println(link1);
                 }
                 //dia chi
                 final HtmlSpan dia_chi = (HtmlSpan) page.getByXPath("//span[@itemprop='address']").get(0);
                 String link_address=dia_chi.getElementsByTagName("a").get(0).getTextContent();
                 System.out.println(link_address+"\n");
                 //dien thoai
                 final HtmlSpan dien_thoai = (HtmlSpan) page.getByXPath("//span[@itemprop='telephone']").get(0);
                 String link_phone=dien_thoai.getTextContent();//getElementsByTagName("span").get(0).getTextContent();
                 System.out.println(link_phone+"\n");
                 //thoi gian hoat dong
                 final HtmlSpan hoat_dong = (HtmlSpan) page.getByXPath("//span[@class='fbProfileBylineLabel']").get(3);
                 String link_active=hoat_dong.getTextContent();//getElementsByTagName("span").get(0).getTextContent();
                 System.out.println(link_active+"\n");
                 */
                         //ten wuan
                   final HtmlSpan ten_wuan_0 = (HtmlSpan) page.getByXPath("//span[@itemprop='name']").get(0);//co thuoc tinh
                   String ten_wuan=ten_wuan_0.getTextContent();//ElementsByTagName("a").get(0).getTextContent();
                       System.out.println(ten_wuan+ "\n");
                       //so gnuoi like class div
                       final HtmlDivision span2 = (HtmlDivision) page.getByXPath("//div[@class='fsm fwn fcg']").get(1);
                  String so_luong=span2.getTextContent();//ElementsByTagName("a").size();
                //  System.out.println(so_luong+ ">????????????????");
                       //so nguoi like class span
                     final HtmlSpan span_nguoi_like = (HtmlSpan) page.getByXPath("//span[@class='_wj stats fwb']").get(0);
                      String so_luong_gnuoi_like=span_nguoi_like.getTextContent();
                       System.out.println(so_luong_gnuoi_like);//nguoi like
                   //
                int span_fbProfileBylineLabel = page.getByXPath("//span[@class='fbProfileBylineLabel']").size();
                 String[] a = new String[5];
                 for(int h=0;h<5;h++){
                 a[h]=null;
                 }
                System.out.println(span_fbProfileBylineLabel);
                for (int i = 0; i < span_fbProfileBylineLabel; i++) {
                    final HtmlSpan span_fbProfileBylineLabel_in = (HtmlSpan) page.getByXPath("//span[@class='fbProfileBylineLabel']").get(i);
                     row1 = span_fbProfileBylineLabel_in.getTextContent();
                    System.out.println(row1 + "\n");//+row2+"\n"+row3);
                 
               
                           a[i]=row1.toString();
                      System.out.println(a[i]+ "\n");
                 
                            
                }
            
                 lay_thongtin_capnhat(a[1], a[2], ten_wuan ,so_luong_gnuoi_like, a[0], a[3], url);
                 // lay_thongtin_capnhat(dia_chi_set_table,null , so_1_set_table, so_nguoi_like, span_a_set_table, null,link_set_table);
            }
            else if (set_div_fsm_fwn_fcg != 0 && set_table == 0) {//nhieu trang ko co cau truc vao day
                //kha dat biet ex:https://www.facebook.com/pages/Mobilecoffee/373140152775517
                // final HtmlDivision div_fsm_fwn_fcg=(HtmlDivision) page.getByXPath("//div[@class='fsm fwn fcg']").get(2);
                // String  row1=div_fsm_fwn_fcg.getTextContent();
                int span_fbProfileBylineLabel = page.getByXPath("//span[@class='fbProfileBylineLabel']").size();
                for (int i = 0; i < span_fbProfileBylineLabel; i++) {
                    final HtmlSpan span_fbProfileBylineLabel_in = (HtmlSpan) page.getByXPath("//span[@class='fbProfileBylineLabel']").get(i);
                    String row2 = span_fbProfileBylineLabel_in.getTextContent();

                    //dia chi
                    //   final HtmlSpan dia_chi = (HtmlSpan) page.getByXPath("//span[@class='fbProfileBylineLabel']").get(1);
                    //  String row2=dia_chi.getElementsByTagName("a").get(0).getTextContent();

                    //dien thoai
                    //     final HtmlSpan dien_thoai = (HtmlSpan) page.getByXPath("//span[@class='fbProfileBylineLabel']").get(2);
                    //  String row3=dien_thoai.getTextContent();//getElementsByTagName("span").get(0).getTextContent();

                    
                    System.out.println(row2 + "\n");//+row2+"\n"+row3);
                    
                }
  //   ***           
            } else if (set_table != 0) {
              //  System.out.println("da vao rui");
// mote
                int table_have_page = page.getByXPath("//div[@class='_69f']").size();//vong for chay
               //    System.out.println(table_have_page+"\n");

                //my for
              
                for (int i = 0; i < table_have_page; i++) {
                    final HtmlDivision table_have_page_in = (HtmlDivision) page.getByXPath("//div[@class='_69g']").get(i);
                     so_1_set_table = table_have_page_in.getElementsByTagName("a").get(0).getTextContent();
                     link_set_table = table_have_page_in.getElementsByTagName("a").get(0).getAttribute("href");
                    final HtmlSpan span = (HtmlSpan) table_have_page_in.getByXPath("//span[@class='fsm fwn fcg']").get(0);
                     span_a_set_table = span.getElementsByTagName("a").get(0).getTextContent();


                    // final HtmlTable table_in =(HtmlTable) page.getByXPath("//table[@class='_69i']");
                    final List<HtmlTable> table_in1 = (List<HtmlTable>) page.getByXPath("//table[@class='_69i']");//list cua tung table
                    //System.out.println("Cell (1,2)=" + table_in1.);//CellAt(1,2));
                   //  String a=table_in1.get(i).getAttribute("tr").;
                         System.out.println(link_set_table+"qwe?????????????");//duong link
                                System.out.println(span_a_set_table+"asd?????????????");//loai wuan
                    System.out.println(so_1_set_table+"ghj?????????");//+ "\n" + link + "\n" + span_a);//ten wuan

               //     int so = table_in1.get(i).getCellAt(1, 1).getColumnSpan();
                    //System.out.println(so);
                    /*
                    if (table_in1.get(i).getCellAt(2, 1).getTextContent() != null) {

                        System.out.println("   Found cell: " + table_in1.get(i).getCellAt(2, 1).getTextContent());


                    }
                    if (table_in1.get(i).getCellAt(1, 1).getTextContent() != null) {
                        System.out.println("   Found cell: " + table_in1.get(i).getCellAt(1, 1).getTextContent());
                    }
*/

                    //System.out.println("   Found cell: " + table_in1.get(i).getCellAt(3, 1).getTextContent());}// mo ta
                    // su ly table 
                    //  for (final HtmlTableRow row : table_in1.get(i).getRows()) {
                    //  System.out.println("   Found cell: " + table_in1.get(i).getCellAt(2, 1).getTextContent());// da o day
                    //  System.out.println("   Found cell: " + table_in1.get(i).getCellAt(1, 1).getTextContent());//like
                    //  System.out.println("   Found cell: " + table_in1.get(i).getCellAt(3, 1).getTextContent()); mo ta
                    //System.out.println("   Found cell: " + table_in1.get(i).getCellAt(4, 1).getTextContent());
                   //  System.out.println("   Found cell: " + table_in1.get(i).getCellAt(5, 1).getTextContent());
                    //  for (final HtmlTableCell cell : row.getCells()) {

                    //System.out.println("   Found cell: " + cell.getTextContent());// su dung astext(); thuong bao loi chung thuc
                    //  }
                    // }

                    for (final HtmlTableBody body : table_in1.get(i).getBodies()) {
                        final List<HtmlTableRow> rows = body.getRows();
                        
                        System.out.println(rows.get(0).getTextContent()+"bbiu??????????????");//dia chi 
                          System.out.println(rows.get(1).getTextContent()+"uhu???????????????????????/");//so gnuoi like
                          dia_chi_set_table=rows.get(0).getTextContent();
                          so_nguoi_like=rows.get(1).getTextContent();
                          //  System.out.println(rows.get(2).getTextContent());//bo trong ham if khi set gia trin index
                           //   System.out.println(rows.get(3).getTextContent());
                         
           
   
    lay_thongtin_capnhat(dia_chi_set_table,null , so_1_set_table, so_nguoi_like, span_a_set_table, null,link_set_table);
                    }
              
                }
               

          
            } if(arr[3].toString().equals("media")){//else if ko dc if thi dc
                
                parse_page_facebook u=new parse_page_facebook();
                         u.media_set(url);
                         
                         
            }

        } catch (Exception ex) {
            ex.printStackTrace();
            System.out.print("loi rui");
        }
    }

    public void media_set(String media) {
        String da_cat;
        try {
            //  ket_noi_url kn=new ket_noi_url();
            final HtmlPage page = kn.ketnoi(media);

            final HtmlHeading1 h1 = (HtmlHeading1) page.getByXPath("//h1[@class='fbPhotoAlbumTitle']").get(0);//dia chi
            // doc=h1.getElementsByTagName("h1").get(0);
            final HtmlSpan span2 = (HtmlSpan) page.getByXPath("//span[@class='fbPhotoCaptionText']").get(0);//mieu ta & thong tin lien lac
            int so_hinh = page.getByXPath("//div[@class='tagWrapper']").size();
            String span_area_text = span2.getTextContent();
            String h1_text = h1.getTextContent();
            System.out.println(h1_text + "\n" + span_area_text);
           System.out.println(so_hinh);
            for (int i = 0; i < so_hinh; i++) {
                final HtmlDivision hinh = (HtmlDivision) page.getByXPath("//div[@class='tagWrapper']").get(i);

                String the_i = hinh.getElementsByTagName("i").get(0).getAttribute("style");

                 da_cat = URLDecoder.decode(cat_theo_ki_tu(the_i), "UTF-8");
              //  System.out.println(da_cat + "\n");
            }
lay_thongtin_capnhat(h1_text, media, h1_text, media, media, span_area_text, media);

//java.lang.String diachi, java.lang.String dienthoai, java.lang.String tenquan, java.lang.String songuoilike, java.lang.String loaiwuan, java.lang.String thoigianhoatdong, java.lang.String linkwuan) 
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void fetch_note(String note) {
        //ton tai trong 1 khoang thoi gian nhat dinh ?co nen lay khong?
    }
public void lay_thongtin_capnhat(String diachi,String dienthoai,String tenquan,String songuoilike,String loaiwuan,String thoigianhoatdong,String linkwuan)
{
   // tt.setLink();

    tt.setLink(l);
    tt.setDiaChi(diachi);
    tt.setDienThoai(dienthoai);
    tt.setTenQuan(tenquan);
    tt.setSoNguoiLike(songuoilike);
    tt.setLoaiWuan(loaiwuan);
    tt.setThoiGianHoatDong(thoigianhoatdong);
    tt.setLinkWuan(linkwuan);
                     /*
    tt.setDiaChi(dia_chi_set_table);
    tt.setDienThoai(null);
    tt.setTenQuan(so_1_set_table);
    tt.setSoNguoiLike(so_nguoi_like);
    tt.setLoaiWuan(span_a_set_table);
    tt.setThoiGianHoatDong(null);
    tt.setLinkWuan(link_set_table);*/
    cd.capnhat(tt);
}
    public String cat_theo_ki_tu(String input) {


        int do_dai = input.length();
        String input_cat_dau = input.substring(22, do_dai - 2);

        String[] arr;
        arr = input.split("");
        String ss = input_cat_dau;
        int do_dai1 = input_cat_dau.length();
        input_cat_dau = input_cat_dau.substring(0, 58);
        //System.out.println(input_cat_dau);
        ss = ss.substring(67, do_dai1);
        //  System.out.println(ss);
        input_cat_dau = input_cat_dau + ss;
        //   System.out.println(input_cat_dau);


        return input_cat_dau;
    }

    
    
    
    
    
    
  
    public static void main(String[] args) throws ExceptionInInitializerError, Exception {


       parse_page_facebook ppf = new parse_page_facebook();
       String url =//"https://vi-vn.facebook.com/mihanquoc";
               //"https://www.facebook.com/pages/City-House-Cafe/613547148675414";
               //"https://vi-vn.facebook.com/mihanquoc"; 
              // "https://vi-vn.facebook.com/notes/david-doan/danh-b%E1%BA%A1-qu%C3%A1n-%C4%83n-t%E1%BA%A1i-tp-hcm/220557731335934";
              
     //ok "https://www.facebook.com/Teecafevn";
       //"";
     //ok           "https://www.facebook.com/pages/N%C3%A9t-Qu%E1%BA%A3ng-Coffee-106-Ba-V%C3%A2n-P-14-Q-T%C3%A2n-B%C3%ACnh-Tp-Hcm/405530006217606";
           //   "https://www.facebook.com/pages/Cafe-Zeus/473517639377982";
       // "https://www.facebook.com/media/set/?set=a.10151253390061951.452237.135833866950&type=1";
              "https://vi-vn.facebook.com/DeSuaRecRec";
        ppf.parse_facebook_is_page_fbLongBlurb(url);
      //  String media="https://www.facebook.com/media/set/?set=a.10151253390061951.452237.135833866950&type=1";
       //  ppf.media_set(media);
    }
}

